library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(countrycode)
df <- read_csv("survey_results_public.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_character(),
## Respondent = col_double(),
## Age = col_double(),
## CompTotal = col_double(),
## ConvertedComp = col_double(),
## WorkWeekHrs = col_double()
## )
## ℹ Use `spec()` for the full column specifications.
DFF <- select(df, MainBranch, Country,Gender,Age, YearsCode, YearsCodePro ,EdLevel,Employment, JobSat, OrgSize, WorkWeekHrs, NEWOvertime, NEWOnboardGood, JobSeek, NEWLearn, LanguageWorkedWith,PlatformWorkedWith)
DF_USA <- DFF[DFF$Country == "United States" & !is.na(DFF$Country) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][3:17]
DF_LC <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
origin = "country.name",
destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_LC <- DF_LC[DF_LC$region == "Latin America & Caribbean" & !is.na(DF_LC$region) & DFF$MainBranch == "I am a developer by profession" & !is.na(DFF$MainBranch), ][, 4:18]
DF_MIX <- cbind(region = as.character(countrycode(sourcevar = DFF$Country,
origin = "country.name",
destination = "region")), DFF)
## Warning in countrycode(sourcevar = DFF$Country, origin = "country.name", : Some values were not matched unambiguously: Nomadic
DF_MIX <- DF_MIX[((DF_MIX$region == "Latin America & Caribbean" & !is.na(DF_MIX$region)) | (DF_MIX$Country == "United States" & !is.na(DF_MIX$Country))) & DF_MIX$MainBranch == "I am a developer by profession" & !is.na(DF_MIX$MainBranch), ][, 3:18]
DF_MIX$Country[DF_MIX$Country != "United States"] <- "América Latina"
DF_MIX$Country[DF_MIX$Country == "United States"] <- "Estados Unidos"
colnames(DF_MIX)[which(names(DF_MIX) == "Country")] <- "Region"
DF_MIX
| Pregunta | Variable | Tipo de Variable |
|---|---|---|
| Which of the following describe you, if any? Please check all that apply. If you prefer not to answer, you may leave this question blank. | Genero | Nominal |
| What is your age (in years)? If you prefer not to answer, you may leave this question blank. | Edad | Discreta |
| Including any education, how many years have you been coding in total? | Años codeando | Discreta |
#(strsplit(DF_USA$Gender, ";"))
#pie(DF_USA$Gender, na.rm = T)
boxplot(DF_MIX$Age~DF_MIX$Region, na.rm= T, xlab = "Region", ylab = "Edad")
abline(h=mean(DF_USA$Age, na.rm = TRUE), col = "red", lwd=2)
abline(h=mean(DF_LC$Age, na.rm = TRUE), col = "blue",lwd=2)
legend(1.2,100,legend=c("media EEUU", "Media SyC"),col=c("red", "blue"),lwd=2)
Podemos observar que las edades de los programadores profesionales están más concentradas en América Latina y el Caribe comparando los rangos intercuartílicos:
IQR(DF_LC$Age, na.rm = TRUE)
## [1] 9
IQR(DF_USA$Age, na.rm = TRUE)
## [1] 12
Además, la edad promedio de los programadores profesionales de América Latina es de 30.14 mientras que el de Estados Unidos es de 34.33,
round(mean(DF_LC$Age, na.rm = TRUE), 2)
## [1] 30.14
round(mean(DF_USA$Age, na.rm = TRUE),2)
## [1] 34.33
Junto con las medianas podemos concluir que en general, los programadores profesiones de Estados Unidos son mayores que que los de América Latina.
round(median(DF_LC$Age, na.rm = TRUE), 2)
## [1] 28.5
round(median(DF_USA$Age, na.rm = TRUE),2)
## [1] 32
boxplot(as.numeric(DF_MIX$YearsCode)~DF_MIX$Region, na.rm= TRUE, xlab = "Region", ylab = "Años Codeando")
abline(h=mean(as.numeric(DF_USA$YearsCode), na.rm = TRUE), col = "red")
abline(h=mean(as.numeric(DF_LC$YearsCode), na.rm = TRUE), col = "blue")